## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

1. Visualising AirBnB Occupancies and Revenue Generated Over Time

Calendar heatmaps are often used to discern patterns, trends, and anomalies over time in a calendar-like interface. In this case, they can be used to understand how AirBnB occupancies and revenue generated over change over the period of 2019 to 2020.

## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion

a. Total Occupancies in 2019 and 2020

We can see that total occupancies in 2019 were generally at least twice that of occupancies in 2020.

## Loading required package: lattice
## Loading required package: grid
## Loading required package: chron
## 
## Attaching package: 'chron'
## The following objects are masked from 'package:lubridate':
## 
##     days, hours, minutes, seconds, years

b. Occupancy Rates in 2019 and 2020

Occupancy rates are severely reduced in 2020.

c. Total Daily Revenue in 2019 and 2020

Total daily revenue decreased from about 6 million dollars a day to 2 or 3 million dollars a day.

d. Average Daily Revenue in 2019 and 2020

Average daily revenues are also lower in 2020.

2. Seasonal Changes in AirBnB Occupancy and Revenue Generated

We can view the same patterns using line graphs instead.

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

3. Data Tables on Occupancy and Revenue of AirBnBs in New York City

The data tables allow users to see the number of bookings, listings, percentage of listings booked, total revenue, and average revenue for any day that they select.

5. What Makes a Host a Superhost?

We were interested to know what makes a host a superhost. We believe that response and acceptance rates could be linked closely to it.

ggplot(data = superhost_summary, aes(x = host_is_superhost, y = avgResponse, fill = host_is_superhost)) +
  geom_col(width = 0.5) +
  labs(title = 'Average Response Rates of Superhosts and Non-Superhosts') +
  ylab('Response Rates (%)') +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = 'none',
        axis.title.x = element_blank())

6. Let’s do some text analysis!!

library(tidyverse)

###/Users/armaanahmed/Desktop/listings.csv 
##/Users/armaanahmed/Desktop/reviews.csv 
##/Users/armaanahmed/Desktop/calendar.csv 
##install.packages("textdata")

reviews <- read.csv("/Users/armaanahmed/Desktop/Data\ Viz\ AirBNB\ Data/Su/data2/reviews.csv")
listings <- read.csv("/Users/armaanahmed/Desktop/Data\ Viz\ AirBNB\ Data/Su/data2/listings.csv")
airbnb <- inner_join(listings, reviews, by=c("id" = "listing_id"))

## filter 2019-2020 data
airbnb <- airbnb %>% filter(date > "2018-12-31" & date < "2021-01-01")
## How many properties does a host own?
airbnb2 <- airbnb %>% group_by(host_id) %>%
  count(id) %>%
  arrange(desc(n)) %>%
  group_by(host_id) %>%
  count() %>% arrange(desc(n)) 

table(airbnb2$n)
## 
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
## 12132  1546   413   195    89    48    34    31    18     7     8     6     2 
##    14    15    16    17    18    20    21    22    23    24    26    29    30 
##     5     1     4     1     2     1     3     2     2     1     2     3     1 
##    31    32    34    35    36    37    40    78    91    98 
##     1     2     1     1     1     1     2     1     1     1

Cleaning and PreProcessing Text

library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(quanteda)
## Package version: 3.0.0
## Unicode version: 10.0
## ICU version: 61.1
## Parallel computing: 6 of 6 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:tm':
## 
##     stopwords
## The following objects are masked from 'package:NLP':
## 
##     meta, meta<-
## remove non-english comments
airbnb3 <- airbnb[which(!grepl("[^\x01-\x7F]+", airbnb$comments)),]


## remove stop words
airbnb3$comments <- removeWords(airbnb3$comments, stopwords(language = "en", source = "stopwords-iso"))
airbnb3$comments <- removeWords(airbnb3$comments, stopwords(language = "en", source = "marimo"))

## remove numbers, whitespace, punctuation
airbnb3$comments <- removeNumbers(airbnb3$comments)
airbnb3$comments <- stripWhitespace(airbnb3$comments)
airbnb3$comments <- removePunctuation(airbnb3$comments)

## tolower
airbnb3$comments <- tolower(airbnb3$comments)

Sentiment Analysis

##install.packages("tidytext")
library(tidytext)

tidy_ab  <- unnest_tokens(airbnb3,  output = word, input = comments) %>%
  anti_join(stop_words, by = "word")

afinn <- get_sentiments("afinn")
tidy_ab_sent <- inner_join(tidy_ab,  afinn, by = "word")

sent_by_rev <- tidy_ab_sent %>%
  group_by(host_id, reviewer_id) %>%
  mutate(rev_sent = mean(value))
summary(tidy_ab_sent$price) 
##    Length     Class      Mode 
##    652281 character character
## 75% of the properties are cheaper than $145 per night

 
tidy_ab_sent <- tidy_ab_sent %>%
  group_by(host_id, reviewer_id) %>%
  mutate(rev_sent = mean(value))

tidy_ab_sent$sentiment_fac <- cut(tidy_ab_sent$rev_sent, breaks = -5:5)
table(tidy_ab_sent$sentiment_fac)
## 
## (-5,-4] (-4,-3] (-3,-2] (-2,-1]  (-1,0]   (0,1]   (1,2]   (2,3]   (3,4]   (4,5] 
##      18     638    2334    5194   13835   49593  230525  318297   31337     509
## divide review sentiments into two groups: good and bad
tidy_ab_sent$sentiment <- ifelse(tidy_ab_sent$value > 0, "good", "bad")

tidy_ab_sent %>%
  filter(price < 200) %>%
  group_by(host_id, reviewer_id) %>%
  ggplot(aes(x=neighbourhood_group_cleansed, y=price, color=sentiment)) +
  geom_boxplot() +
  labs(title="Neighborhood groups and Price by sentiment",
       x="Neighborhood Group", y="Price per night($)")

tidy_ab_combined <- tidy_ab_sent %>%
  group_by(word, sentiment) %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'word'. You can override using the `.groups` argument.
ab_dtm <- tidy_ab_combined %>% cast_dtm(sentiment, word, count)
ab_dtm
## <<DocumentTermMatrix (documents: 2, terms: 1528)>>
## Non-/sparse entries: 1528/1528
## Sparsity           : 50%
## Maximal term length: 17
## Weighting          : term frequency (tf)
ab_m <- as.matrix(ab_dtm)
ab_tm <- t(ab_m)
Good Comments Word Cloud

What are the key words that are found in good comments?

library(wordcloud)
## Loading required package: RColorBrewer
## success
filter(tidy_ab_combined, sentiment=="good") %>%
  with(wordcloud(word, count, max.words = 100, min.freq=3,scale=c(4,.5), 
           random.order = FALSE, rot.per=.5,colors="blue"))

Words like Clean, nice, recommend all come up! It seems like cleanliness, aesthetics, and social cues (like recommend) are the most important aspect of a good review.

Bad Comments Word Cloud

What are the key words that are found in bad comments?

## success
filter(tidy_ab_combined, sentiment=="bad") %>%
  with(wordcloud(word, count, max.words = 100, min.freq=3,scale=c(4,.5), 
           random.order = FALSE, rot.per=.5,colors="red"))

Having words like noisy, bad, dirty, block, hard stops come up in bad reviews! people want to have a nice, quiet, clean place to stay!

Dissimilar words Word Cloud
comparison.cloud(ab_tm, colors = c("blue", "red"),
                 scale=c(3.6,.5), random.order = FALSE, rot.per=.5, title.size= 1,
                 max.words = 100)

7. How does the distribution of AirBNB locations look throughout NYC?

library(readxl)
library(ggplot2)
library(ggthemes)
library(dplyr)
library(maps)
library(tidyverse)
library(tmap)
library(ggmap)
library(hablar)
library(maps)
library(tidyverse)
library(ggmap)
library(rgdal)
library(data.table)
library(devtools)
library(leaflet)
library(geojsonio)
library(readr)
library(RgoogleMaps)



reviews <- read_csv("/Users/armaanahmed/Desktop/untitled\ folder\ 2/reviews.csv")
calendar <- read_csv("/Users/armaanahmed/Desktop/untitled\ folder\ 2/calendar.csv")
listings <- read_csv("/Users/armaanahmed/Desktop/untitled\ folder\ 2/listings.csv")

airbnb <- read_csv("/Users/armaanahmed/Documents/GitHub/Group_O_Airbnb/AB_US_2020.csv")
airbnb<-subset(airbnb, city == "New York City")


##Get rid of unnecessary data in park dataset


##Create base layer map
map_TS_st1 <- get_map("New York City", zoom=12, 
                      source="stamen",maptype="toner-background")
ggmap_TS_st1 <- ggmap(map_TS_st1) 
ggmap_TS_st1

map2<-ggmap_TS_st1 + geom_point(aes(x=longitude,y=latitude),data=airbnb, 
                    size=1, alpha=0.9, color="blue")
map2

airbnbdt <- as.data.table(airbnb)

##let's stagger the prices ranges
airbnb$pricerange[airbnb$price > 400] <- "Ultra-expensive?"
airbnb$pricerange[airbnb$price < 400] <- "Expensive"
airbnb$pricerange[airbnb$price < 300] <- "Kinda Pricey"
airbnb$pricerange[airbnb$price < 200] <- "A steal"
airbnb$pricerange[airbnb$price < 100] <- "$99 Bargain"

##do some color work/differentiate price by color
library(RColorBrewer)
pal = colorFactor("Set1", domain = airbnb$pricerange) # Grab a palette
color_offsel1 = pal(airbnb$pricerange)

##popup content
content <- paste("Check this AirBNB out!!", "<br/>",
                 "Price:",airbnb$price,"<br/>",
                 "Number of Reviews:",airbnb$number_of_reviews,"<br/>",
                 "Type of Room:",airbnb$room_type,"<br/>")

interactiveairbnbmap <- leaflet(airbnb, options = leafletOptions(minZoom = 12, maxZoom = 18)) %>%  # Create a map widget
  addTiles() %>%    
  addCircles(lat=~latitude, lng=~longitude,color = color_offsel1, popup = content) %>%
    addProviderTiles("NASAGIBS.ViirsEarthAtNight2012") %>%setView( lng = -73.96, lat = 40.78, zoom = 14 )
interactiveairbnbmap
clusteredmap <- leaflet(airbnb, options = leafletOptions()) %>%  # Create a map widget
  addTiles() %>%    # Add default OpenStreetMap map tiles
  addCircleMarkers(lat=~latitude, lng=~longitude,color = color_offsel1, popup = content,  clusterOptions = markerClusterOptions()) %>%
  setView( lng = -73.96, lat = 40.78, zoom = 14 ) %>% addLegend(pal = pal, values = airbnb$pricerange, title = "AirBNB's in New York City <br/> Check it out!") %>% addProviderTiles("NASAGIBS.ViirsEarthAtNight2012")
clusteredmap
##It looks like a majority of AirBNBs are densely concentrated in either Manhattan or Long Island City, which less and less more scattered out throughout the Bronx, Staten Island and Queens.
##Create a table that has the mean Fine by police precinct
averagefines <- airbnb %>%
    group_by(price) %>%
    dplyr::summarize(Mean = mean(price, na.rm=TRUE))

##innerjoin neighborhood from listings to airbnb

##Get rid of unnecessary data in listings dataset
smalllistings <- listings %>% select(neighbourhood_cleansed, id)

airbnb <- left_join(airbnb, 
                      smalllistings, 
                       by=c("id"))

##Save it as a dataframe
##as.data.frame.matrix(averagefines) 
averagefines <- data.frame(averagefines)


```r
#okay, lets bring covid into this
##mergeddata$covidfines <- ave(mergeddata$Fine, mergeddata$day, FUN=cumsum)
airbnbprice <- aggregate(airbnb$price, by=list(Category=airbnb$neighbourhood_cleansed), FUN=mean)
airbnbprice <- rename(airbnbprice, Neighborhood = Category)
airbnbprice <- rename(airbnbprice, Price = x)
airbnbprice <- subset(airbnbprice, Neighborhood != "Fort Wadsworth")


library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
s8 <- ggplot(airbnbprice,aes(x = Neighborhood, y = Price)) + geom_point(aes(color = Neighborhood, fill = Price)) + ggtitle("Average Price by Neighborhood") + ylab("Average Price") + xlab("Neighborhood") + theme_bw()  +theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+ theme(legend.position = "none") + theme(plot.title = element_text(hjust = 0.5))

int <- plotly_build(s8) 
int

8. How do prices vary by borough/neighborhood for AirBNBs?

pricebyneighborhoodplot <- plot_ly(airbnbprice, x = ~Neighborhood, y = ~Price, type = 'scatter', mode = 'markers', marker = list(size = ~Price/10, opacity = 0.5, color = ~Price, colors = 'Paired'))
pricebyneighborhoodplot <- pricebyneighborhoodplot %>% layout(title = 'Average Price by Neighborhood in NYC',
         xaxis = list(showgrid = FALSE, showticklabels=FALSE, visible = FALSE),
         yaxis = list(showgrid = FALSE))

pricebyneighborhoodplot
##add borough information
##hand coded this information from wikipedia info on different boroughs and the neighborhoods they contain
neighborhoodborough <- read_csv("/Users/armaanahmed/Documents/GitHub/Group_O_Airbnb/neighborhoodbyborough.csv")
## Warning: Missing column names filled in: 'X3' [3], 'X4' [4], 'X5' [5], 'X6' [6],
## 'X7' [7], 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12], 'X13' [13]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Neighborhood = col_character(),
##   Borough = col_character(),
##   X3 = col_logical(),
##   X4 = col_logical(),
##   X5 = col_logical(),
##   X6 = col_logical(),
##   X7 = col_logical(),
##   X8 = col_logical(),
##   X9 = col_logical(),
##   X10 = col_logical(),
##   X11 = col_logical(),
##   X12 = col_logical(),
##   X13 = col_logical()
## )
neighborhoodborough <- neighborhoodborough %>%
  select(Neighborhood, Borough)

#join the borough and airbnb data
airbnbprice <- left_join(airbnbprice, 
                      neighborhoodborough, 
                       by=c("Neighborhood"))

##list of neighborhoods that have to be manually added Battery Park City, Bedford Stuyvesant, Belmont, Bergen Beach, Brighton Beach, Bull's Head, Claremont Village, Columbia St, Concourse, Concourse Village, Ditmars Steinway, East Harlem, East Morrisania, Fieldston, Flatiron District, Fordham, Gramercy, Highbridge, Howland Hook, Hunts Point, Kingsbridge, Longwood, Melrose, Morris Heights, Morrisania, Mott Haven, Mount Eden, Mount Hope, Navy Yard, New Dorp Beach, Nolita, North Riverdale, Norwood, Port Morris, Prospect-Lefferts Gardens, Riverdale, Roosevelt Island, South Slope, Spuyten Duyvil, St. Albans, St. George, Stuyvesant Town, Throgs Neck, Tremont, Tribeca, University Heights, West Farms, Windsor Terrace, Woodlawn


##check with rows are still NA
NAROWSDF <- airbnbprice[rowSums(is.na(airbnbprice)) > 0,]
NAROWSDF
##                  Neighborhood     Price Borough
## 7           Battery Park City 210.70175    <NA>
## 14         Bedford-Stuyvesant 109.65472    <NA>
## 17                    Belmont  49.11111    <NA>
## 19               Bergen Beach 129.60000    <NA>
## 23             Brighton Beach 113.58824    <NA>
## 27                Bull's Head  53.00000    <NA>
## 41          Claremont Village  71.04545    <NA>
## 48                Columbia St 182.80645    <NA>
## 50                  Concourse  80.51064    <NA>
## 51          Concourse Village  60.33333    <NA>
## 56           Ditmars Steinway  93.59487    <NA>
## 64                East Harlem 130.61490    <NA>
## 65            East Morrisania  86.58333    <NA>
## 75                  Fieldston 350.11111    <NA>
## 78          Flatiron District 338.63636    <NA>
## 81                    Fordham  78.46667    <NA>
## 88                   Gramercy 182.97642    <NA>
## 98                 Highbridge  76.44444    <NA>
## 102              Howland Hook 137.50000    <NA>
## 104               Hunts Point  49.77778    <NA>
## 114               Kingsbridge  88.90909    <NA>
## 121                  Longwood  97.60465    <NA>
## 127                   Melrose  54.28571    <NA>
## 134            Morris Heights 120.52941    <NA>
## 136                Morrisania  80.18182    <NA>
## 137                Mott Haven 107.48837    <NA>
## 138                Mount Eden 108.75000    <NA>
## 139                Mount Hope  86.26316    <NA>
## 141                 Navy Yard 134.50000    <NA>
## 144            New Dorp Beach  82.00000    <NA>
## 147                    Nolita 191.89362    <NA>
## 148           North Riverdale  96.00000    <NA>
## 149                   Norwood  73.19048    <NA>
## 157               Port Morris  74.38462    <NA>
## 161 Prospect-Lefferts Gardens 101.56129    <NA>
## 169                 Riverdale 121.33333    <NA>
## 171          Roosevelt Island 115.55814    <NA>
## 177            Sheepshead Bay 137.28283    <NA>
## 184               South Slope 150.15625    <NA>
## 186            Spuyten Duyvil  91.33333    <NA>
## 187                St. Albans 114.55357    <NA>
## 188                St. George  91.45714    <NA>
## 190           Stuyvesant Town 175.17308    <NA>
## 195               Throgs Neck  79.04545    <NA>
## 199                   Tremont  78.71429    <NA>
## 200                   Tribeca 408.27083    <NA>
## 203        University Heights  52.31250    <NA>
## 211                West Farms  65.00000    <NA>
## 219           Windsor Terrace 135.24051    <NA>
## 221                  Woodlawn  53.75000    <NA>
#add their correct borough
airbnbprice$Borough[airbnbprice$Neighborhood=='Battery Park City'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='Bedford-Stuyvesant'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Belmont'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Bergen Beach'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Brighton Beach'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Bull\`s Head'] <- 'Staten Island'
airbnbprice$Borough[airbnbprice$Neighborhood=='Claremont Village'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Columbia St'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Concourse'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Concourse Village'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Ditmars Steinway'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='East Harlem'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='East Morrisania'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Fieldston'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='Flatiron District'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='Fordham'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Gramercy'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='Highbridge'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Howland Hook'] <- 'Staten Island'
airbnbprice$Borough[airbnbprice$Neighborhood=='Hunts Point'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Kingsbridge'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Longwood'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Melrose'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Morris Heights'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Morrisania'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Mott Haven'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Mount Eden'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Mount Hope'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Navy Yard'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='New Dorp Beach'] <- 'Staten Island'
airbnbprice$Borough[airbnbprice$Neighborhood=='North Riverdale'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Norwood'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Port Morris'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Prospect-Lefferts Gardens'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Riverdale'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Roosevelt Island'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='South Slope'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Spuyten Duyvil'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='St. Albans'] <- 'Queens'
airbnbprice$Borough[airbnbprice$Neighborhood=='St. George'] <- 'Staten Island'
airbnbprice$Borough[airbnbprice$Neighborhood=='Stuyvesant Town'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='Throgs Neck'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Tremont'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Tribeca'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='University Heights'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='West Farms'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Windsor Terrace'] <- 'Brooklyn'
airbnbprice$Borough[airbnbprice$Neighborhood=='Woodlawn'] <- 'Bronx'
airbnbprice$Borough[airbnbprice$Neighborhood=='Nolita'] <- 'Manhattan'
airbnbprice$Borough[airbnbprice$Neighborhood=='Sheepshead Bay'] <- 'Brooklyn'

##sort it alphabetically so each borough gets their own space
airbnbprice <- airbnbprice[order(airbnbprice$Borough),]
str(airbnbprice)
## 'data.frame':    222 obs. of  3 variables:
##  $ Neighborhood: chr  "Allerton" "Baychester" "Belmont" "Bronxdale" ...
##  $ Price       : num  109.7 85.5 49.1 55.9 72 ...
##  $ Borough     : chr  "Bronx" "Bronx" "Bronx" "Bronx" ...
##let's add some color
airbnbprice$Color[airbnbprice$Borough=='Manhattan'] <- 'darksalmon'
airbnbprice$Color[airbnbprice$Borough=='Queens'] <- 'aquamarine'
airbnbprice$Color[airbnbprice$Borough=='Bronx'] <- 'yellow'
airbnbprice$Color[airbnbprice$Borough=='Brooklyn'] <- 'violet'
airbnbprice$Color[airbnbprice$Borough=='Staten Island'] <- 'lavender'

##lets plot it again by borough
pricebyneighborhoodplot <- plot_ly(airbnbprice, x = ~Neighborhood, y = ~Price, type = 'scatter', mode = 'markers', text = paste("<br>Neighborhood: ", airbnbprice$Neighborhood, "<br>Borough: ", airbnbprice$Borough,"<br>Average Price: ", airbnbprice$Price, hoverinfo = ''), marker = list(size = ~Price/10, opacity = 0.5, color = ~Color, fill = ~Borough, colors = 'Paired'))
pricebyneighborhoodplot <- pricebyneighborhoodplot %>% layout(title = 'Average AirBNB Price by Neighborhood in NYC',
         xaxis = list(showgrid = FALSE, showticklabels=FALSE, visible = FALSE),
         yaxis = list(showgrid = FALSE))

pricebyneighborhoodplot
##It looks like manhattan takes the cake when it comes to having the most expensive median airbnb rates, with staten island coming suprisingly close. Queen, the Bronx, and Brooklyn all have generally lower average rates. 

For our chloropleth maps, they are attached seperately because the html files were too large. AirBNB in 2019 had more rooms and vacancies throughout NYC, while in 2020 we saw that the number of rooms and vacancies decreased due to COVID-19. plotly_num_airbnb19.html plotly_num_airbnb20.html also in our GITHUB!

Thanks for a great semester!!